#!/usr/bin/env python
# -*- coding:utf-8 -*-

"""
@author zyx
@since 2022/2/12 08:54
@file: c04_爬取城市.py
"""
from lxml import etree
import requests

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
                  'Chrome/97.0.4692.71 Safari/537.36'
}
url = 'https://www.aqistudy.cn/historydata/'
page_text = requests.get(url=url, headers=headers).text
tree = etree.HTML(page_text)
# 解析热门城市+所有城市
# 此处xpath表达式的管道符（|）可以是的xpath表达式更加具有通用性
li_list = tree.xpath('//div[@class="bottom"]/ul/li | //div[@class="bottom"]/ul/div[2]/li')
city_set = {li.xpath('./a/text()')[0] for li in li_list}
print(city_set)
